from Bio.SeqUtils.ProtParam import ProteinAnalysis
import re

def read(filename): # 读取数据
    seqs=list()
    with open(filename,"r",encoding="utf8",) as f:
        for line in f :
            line=line.strip()
            if line[0] != ">":  # 选择序列行
                seqs.append(line)
    return seqs

def cleave(seqs):   # 正则比配剪切
    peptides=list()
    trypsin_pattern = re.compile(r'(?<=[RK])')
    for seq in seqs:
        peptides.extend(trypsin_pattern.split(seq))
    return peptides

def PrintWeight(peptides):  # 计算并展示结果
    print("ID\tweight (g/mol)\tpolypeptide sequence")
    for i,peptide in enumerate(peptides):
        weight=ProteinAnalysis(peptide).molecular_weight()
        print(f"{i+1}\t{weight:0.2f}\t{peptide}")

if __name__=="__main__":
    seqs=read("P01308.fasta")
    peptides=cleave(seqs)
    PrintWeight(peptides)